0. Load Libraries and Define Functions
# 0.0 Load libraries ----
# Loads tidyquant, lubridate, xts, quantmod, TTR, and PerformanceAnalytics
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.9 ──
## ✔ PerformanceAnalytics 2.0.4 ✔ TTR 0.24.4
## ✔ quantmod 0.4.26 ✔ xts 0.14.0── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
#0.1 Define functions ----
#The following functions explicitly compute annual, monthly and weekly returns
get_annual_returns <- function(stock.returns) {
stock.returns %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
type = "log",
period = "yearly")
}
get_monthly_returns <- function(stock.returns) {
stock.returns %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
type = "log",
period = "monthly")
}
get_weekly_returns <- function(stock.returns) {
stock.returns %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
type = "log",
period = "weekly")
}
get_daily_returns <- function(stock.returns) {
stock.returns %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
type = "log",
period = "daily")
}
1. Collect stock price data from yahoo —-
1.1 Get data on all stocks in the SP00 —-
stocks_tbl <- tq_index("SP500")
## Getting holdings for SP500
stocks_tbl
## # A tibble: 505 × 8
## symbol company identifier sedol weight sector shares_held local_currency
## <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <chr>
## 1 AAPL APPLE INC 037833100 2046… 0.0692 - 171767010 USD
## 2 MSFT MICROSOFT C… 594918104 2588… 0.0655 - 88569779 USD
## 3 NVDA NVIDIA CORP 67066G104 2379… 0.0569 - 293126746 USD
## 4 AMZN AMAZON.COM … 023135106 2000… 0.0349 - 109092981 USD
## 5 META META PLATFO… 30303M102 B7TL… 0.0240 - 26127829 USD
## 6 GOOGL ALPHABET IN… 02079K305 BYVY… 0.0190 - 70032798 USD
## 7 BRK-B BERKSHIRE H… 084670702 2073… 0.0181 - 21584443 USD
## 8 GOOG ALPHABET IN… 02079K107 BYY8… 0.0159 - 58262441 USD
## 9 LLY ELI LILLY +… 532457108 2516… 0.0157 - 9507966 USD
## 10 JPM JPMORGAN CH… 46625H100 2190… 0.0135 - 34237071 USD
## # ℹ 495 more rows
# # Symbol "BRK.B" gives error from tq_get
# # Change to "BRK-B"
# rowindex_0<-which(stocks_tbl$symbol=="BRK.B")
# stocks_tbl$symbol[rowindex_0]<-"BRK-B"
#
# # Change to "BF.B" to "BF-B"
# rowindex_0<-which(stocks_tbl$symbol=="BF.B")
# stocks_tbl$symbol[rowindex_0]<-"BF-B"
#
# # Drop symbol "-"
index_dash<-which(stocks_tbl$symbol=="-")
stocks_tbl0<-stocks_tbl[-c( index_dash),]
# Collect all symbols in stocks_tbl0
stocks_symbols<-stocks_tbl0$symbol
1.2 Set start and end dates for price data —-
date_start= "2019-01-01"
date_end= "2024-08-31"
# Collecting data from large group can take
# significant amount of time
# (on 9/12/2023, less than 3 minutes)
# To monitor progress we split large group into subgroups
1.3 Split large group of stocks into subgroups —-
stocks_tbl0$group<-floor((c(1:nrow(stocks_tbl0))-1)/50)
list_group<-unique(stocks_tbl0$group)
#for (group0 in list_group[-c(1:4)]){
for (group0 in list_group) {
stocks_data <- stocks_tbl0 %>%
filter(group==group0) %>%
select(symbol, company) %>%
tq_get(from = date_start, to=date_end)
assign(paste("stocks_data",group0,sep="."),
stocks_data)
print(c("End for group ", as.character(group0)))
}
## [1] "End for group " "0"
## [1] "End for group " "1"
## [1] "End for group " "2"
## [1] "End for group " "3"
## [1] "End for group " "4"
## [1] "End for group " "5"
## [1] "End for group " "6"
## [1] "End for group " "7"
## [1] "End for group " "8"
## [1] "End for group " "9"
## [1] "End for group " "10"
# If any warnings occur, determine which symbol generated message
# Revise symbol name (as above, replacing "." by "-", or delete
# as was case for symbol = "-", which corresponded to US dollar).
1.4 Bind together data from all groups
stocks_data<-rbind(
stocks_data.0,
stocks_data.1,
stocks_data.2,
stocks_data.3,
stocks_data.4,
stocks_data.5,
stocks_data.6,
stocks_data.7,
stocks_data.8,
stocks_data.9,
stocks_data.10)
dim(stocks_data)
## [1] 707653 9
1.5 Convert stocks_data to time series matrix
stocks_data1 <- stocks_data %>%
dplyr::select(symbol, date, adjusted) %>%
dplyr::filter(!is.na(adjusted)) %>%
dplyr::rename(price = adjusted)
stocks_tsmatrix<- stocks_data1 %>%
pivot_wider(names_from = symbol, values_from = price)
dim(stocks_tsmatrix)
## [1] 1426 504
# On 9/10/2024
# dim(stocks_tsmatrix)
# [1] 1426 504
names(stocks_tsmatrix)
## [1] "date" "AAPL" "MSFT" "NVDA" "AMZN" "META" "GOOGL" "BRK-B" "GOOG"
## [10] "LLY" "JPM" "AVGO" "TSLA" "UNH" "XOM" "V" "PG" "JNJ"
## [19] "MA" "COST" "HD" "ABBV" "WMT" "MRK" "NFLX" "KO" "BAC"
## [28] "ADBE" "PEP" "CVX" "CRM" "TMO" "AMD" "ORCL" "LIN" "ACN"
## [37] "MCD" "ABT" "PM" "CSCO" "WFC" "IBM" "TXN" "GE" "QCOM"
## [46] "VZ" "DHR" "INTU" "NOW" "AMGN" "ISRG" "NEE" "PFE" "SPGI"
## [55] "CAT" "DIS" "RTX" "GS" "CMCSA" "T" "UNP" "AMAT" "PGR"
## [64] "UBER" "AXP" "LOW" "TJX" "HON" "BKNG" "ELV" "COP" "LMT"
## [73] "MS" "BLK" "SYK" "VRTX" "BSX" "REGN" "MDT" "PLD" "CB"
## [82] "ETN" "MMC" "C" "ADP" "AMT" "PANW" "ADI" "SBUX" "MDLZ"
## [91] "CI" "FI" "TMUS" "DE" "BX" "BMY" "GILD" "SO" "NKE"
## [100] "KLAC" "LRCX" "MU" "SCHW" "BA" "UPS" "MO" "ICE" "DUK"
## [109] "CL" "ZTS" "SHW" "ANET" "INTC" "EQIX" "KKR" "CME" "TT"
## [118] "WM" "AON" "WELL" "MCO" "HCA" "PH" "CMG" "NOC" "MSI"
## [127] "PNC" "PYPL" "APH" "TDG" "CVS" "MMM" "SNPS" "USB" "CTAS"
## [136] "TGT" "EOG" "CDNS" "BDX" "GD" "ITW" "ORLY" "MCK" "CSX"
## [145] "AJG" "FDX" "ECL" "APD" "CARR" "NXPI" "ROP" "NEM" "NSC"
## [154] "FCX" "SLB" "MPC" "CRWD" "EMR" "TFC" "AFL" "DHI" "PSA"
## [163] "CEG" "GEV" "TRV" "MAR" "O" "ADSK" "AEP" "COF" "PSX"
## [172] "WMB" "GM" "AZO" "OKE" "HLT" "SPG" "SRE" "ABNB" "CCI"
## [181] "ROST" "BK" "KMB" "PCAR" "ALL" "AIG" "DLR" "D" "FTNT"
## [190] "URI" "FIS" "JCI" "MET" "LEN" "KVUE" "TEL" "MSCI" "IQV"
## [199] "FICO" "VLO" "AMP" "LHX" "CPRT" "GWW" "GIS" "PAYX" "PCG"
## [208] "RSG" "F" "PRU" "ACGL" "HUM" "KMI" "MCHP" "STZ" "CMI"
## [217] "A" "PEG" "MPWR" "IDXX" "EW" "COR" "CTVA" "SYY" "VRSK"
## [226] "FAST" "KDP" "EXC" "IT" "CTSH" "AME" "RCL" "CNC" "YUM"
## [235] "OTIS" "EXR" "HWM" "RMD" "EFX" "PWR" "ED" "GEHC" "MNST"
## [244] "DOW" "HES" "IR" "EA" "HIG" "VICI" "XEL" "CBRE" "OXY"
## [253] "KR" "ODFL" "BKR" "NUE" "DFS" "DD" "EIX" "CSGP" "IRM"
## [262] "FANG" "TRGP" "CHTR" "AVB" "GLW" "MLM" "XYL" "VMC" "WTW"
## [271] "EBAY" "WEC" "MTD" "ON" "ROK" "HPQ" "PPG" "NDAQ" "TSCO"
## [280] "ADM" "HSY" "NVR" "BIIB" "CDW" "KHC" "LULU" "FITB" "AWK"
## [289] "DAL" "GPN" "GRMN" "MTB" "WAB" "PHM" "DXCM" "CAH" "ANSS"
## [298] "DVN" "IFF" "ETR" "SBAC" "CHD" "VTR" "DTE" "AXON" "EQR"
## [307] "HAL" "KEYS" "FTV" "MRNA" "STT" "DOV" "BR" "TTWO" "TYL"
## [316] "BRO" "VST" "STE" "LYB" "VLTO" "ES" "NTAP" "PPL" "TROW"
## [325] "FE" "DECK" "HPE" "SW" "WST" "CBOE" "WY" "RJF" "FSLR"
## [334] "AEE" "ZBH" "CPAY" "CINF" "GDDY" "COO" "K" "HBAN" "RF"
## [343] "LDOS" "MKC" "SMCI" "INVH" "CLX" "BLDR" "HUBB" "CMS" "EL"
## [352] "BALL" "WDC" "PTC" "TDY" "ATO" "BAX" "SYF" "WAT" "STX"
## [361] "OMC" "HOLX" "CFG" "ESS" "LH" "GPC" "TER" "BBY" "EQT"
## [370] "DRI" "MOH" "TSN" "APTV" "ULTA" "MAA" "ARE" "PKG" "J"
## [379] "NTRS" "WRB" "DG" "AVY" "LUV" "PFG" "CNP" "EXPD" "DGX"
## [388] "CTRA" "TXT" "MAS" "EXPE" "ZBRA" "EG" "IP" "STLD" "FDS"
## [397] "NRG" "WBD" "VRSN" "CCL" "AMCR" "UAL" "SWKS" "ALGN" "DOC"
## [406] "CAG" "KIM" "PODD" "KEY" "MRO" "NI" "LNT" "IEX" "SWK"
## [415] "LVS" "L" "SNA" "DPZ" "RVTY" "GEN" "BG" "AKAM" "CF"
## [424] "PNR" "ENPH" "DLTR" "JBHT" "ROL" "EVRG" "UHS" "UDR" "TRMB"
## [433] "LYV" "POOL" "VTRS" "NDSN" "CPT" "KMX" "JKHY" "JBL" "SJM"
## [442] "REG" "JNPR" "CE" "IPG" "FFIV" "HST" "ALLE" "EPAM" "CHRW"
## [451] "EMN" "TFX" "TECH" "AES" "CTLT" "LKQ" "HII" "BXP" "TAP"
## [460] "QRVO" "CPB" "AIZ" "NWSA" "PNW" "MTCH" "FOXA" "MKTX" "CRL"
## [469] "AOS" "TPR" "HRL" "SOLV" "ALB" "INCY" "LW" "HSIC" "APA"
## [478] "GL" "MGM" "GNRC" "DAY" "HAS" "FRT" "BF-B" "MOS" "DVA"
## [487] "PAYC" "FMC" "BWA" "MHK" "CZR" "WYNN" "AAL" "NCLH" "IVZ"
## [496] "RL" "BIO" "WBA" "BBWI" "ETSY" "BEN" "PARA" "FOX" "NWS"
1.6 Subset out stocks with no missing data
missingcount_bydate<-apply(is.na(stocks_tsmatrix),1,sum)
plot(x=stocks_tsmatrix$date, y=missingcount_bydate,type="l")

# On 9/12/2022, 11 stocks had some missing values on whole period from date_start to date_end
# Define stocks_tsmatrix2 to be stocks with no missing values
missingcount_bystock<-apply(is.na(stocks_tsmatrix),2,sum)
plot(x=c(1:ncol(stocks_tsmatrix)),
y=missingcount_bystock,type="l")

sum(missingcount_bystock==0)
## [1] 488
# 488 stocks had no missing prices on period
which_cols_nomissing<-which(missingcount_bystock==0)
# Extract data for stocks with no missing prices ----
dim(stocks_tsmatrix)
## [1] 1426 504
stocks_tsmatrix0<-stocks_tsmatrix[,which_cols_nomissing]
1.7 Create stocks_tbl0 corresponding to symbols in
stocks_tsmatrix
# names(stocks_tsmatrix0)
dim(stocks_tsmatrix0) # columns equal to 1(date) + (number of stocks)
## [1] 1426 488
which_symbols_tsmatrix0<-match(names(stocks_tsmatrix0)[-1],
stocks_tbl0$symbol,nomatch=0)
stocks_tbl0_tsmatrix0<- stocks_tbl0[which_symbols_tsmatrix0,]
dim(stocks_tsmatrix0)
## [1] 1426 488